## show empirical correlation between spending and compensation ##
library(dplyr)
library(sandwich)
library(stargazer)
library(ggplot2)

setwd("~/Dropbox/Project_fundraising/dataverse/")

# carnes-hansen data
chdat = read.csv("Original Data/carnes_hansen_salary_replication.csv")
chdat = chdat %>% mutate(nimsp_dem_spending = nimsp_dem_spending / 1000,
                         nimsp_rep_spending = nimsp_rep_spending / 1000,
                         total_comp = total_comp / 1000,
                         tot_spend = nimsp_dem_spending + nimsp_rep_spending)

nimsp = read.csv("Original Data/FollowTheMoneyDownload20180319(040359).csv", stringsAsFactors = FALSE)
names(nimsp) = tolower(names(nimsp))
nimsp = nimsp %>% 
  rename(year = election_year,state = election_jurisdiction, tot = total_.) %>% 
  filter(year %in% c(2011, 2012, 2010)) 
  



# Carnes-Hansen has compensation in 2006 and salary for 2007 and 2012
# merge NIMSP data from 2012



totreg = lm(tot_spend ~ total_comp, chdat)
totreg_vcv = vcovHC(totreg, "HC0")
totreg_se = sqrt(diag(totreg_vcv))

demreg = lm(nimsp_dem_spending ~ total_comp, chdat)
demreg_vcv = vcovHC(demreg, "HC0")
demreg_se = sqrt(diag(demreg_vcv))

repreg = lm(nimsp_rep_spending ~ total_comp, chdat)
repreg_vcv = vcovHC(repreg, "HC0")
repreg_se = sqrt(diag(repreg_vcv))

lmtest::coeftest(totreg, totreg_vcv)

stargazer(totreg, demreg, repreg, 
          se = list(totreg_se, demreg_se, repreg_se),
          float = FALSE, 
          no.space = TRUE, 
          dep.var.labels = c("Total Spending", "Dem. Spending", "Rep. Spending"), 
          omit.stat = c("f", "ser", "adj.rsq"), 
          omit.table.layout = "n", digits = 1,
          align = TRUE,
          star.cutoffs = .05, 
          out = "docs/tables/spend_compensation.tex",
          covariate.labels = "Compensation")



ggplot(chdat) + 
  aes(x = total_comp, y = tot_spend, label = state_ab) + 
  geom_text() + 
  stat_smooth(method = "lm", se = FALSE, colour = "black") +
  scale_x_continuous(breaks = seq(0, 100, 25), label = function(x) paste0("$", x)) +
  scale_y_log10(label = function(x) paste0("$", prettyNum(x, big.mark = ",") )) +
  theme_minimal() + 
  labs(x = "Total compensation (thousands)", 
       y = "Total spending in legislative races\n(thousands, log scale)")
ggsave("figs/carnes_comp_spending.pdf", width=6, height=4)




#  show distribution of legislator compensation ---------------------------

chdat %>% 
  filter(year == 2012) %>% 
  arrange(total_comp) %>% 
  mutate(state_ab = factor(state_ab, unique(state_ab))) %>% 
  ggplot() + 
  aes(x = state_ab, y = total_comp) +
  geom_bar(stat = "identity") + 
  geom_hline(yintercept = 15) + 
  geom_hline(yintercept = 50) + 
  geom_hline(yintercept = 80) + 
  coord_flip() + 
  scale_y_continuous(label = function(x) paste0("$",x)) + 
  labs(x = NULL, y = "Legislative compensation (thousands)") + 
  theme_minimal() + 
  theme(panel.grid = element_blank(), 
        panel.spacing = unit(0, "in"),
        text = element_text(size = 14))
ggsave("figs/carnes_leg_comp_2012.pdf",width=6,height=8)




